import jieba
from tensorflow.keras.preprocessing.text import  Tokenizer
import joblib

def dm_onehot_gen():
    vocabs = {"周杰伦", "陈奕迅", "王力宏", "李宗盛", "吴亦凡", "鹿晗"}
    mytokenier = Tokenizer()
    mytokenier.fit_on_texts(vocabs)
    for vocab in vocabs:
        zero_list = [0] * len(vocabs)
        idx = mytokenier.word_index[vocab] - 1
        zero_list[idx] = 1
        print(vocab,'',zero_list)

    path = './mytokenier'
    joblib.dump(mytokenier,path)
    print('保存成功')
    print(mytokenier.word_index)
    print(mytokenier.index_word)

def dm_onehot_use():
    vocabs = {"周杰伦", "陈奕迅", "王力宏", "李宗盛", "吴亦凡", "鹿晗"}
    path = './mytokenier'
    mytokenizer = joblib.load(path)
    token = '李宗盛'
    idx = mytokenizer.word_index[token] - 1
    zero_list = [0] * len(vocabs)
    zero_list[idx] = 1
    print(token,'onehot',zero_list)



if __name__ == '__main__':
    print('start')
    dm_onehot_use()


